In [40]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Chisquare distribution


In [41]:
#Generate a random variable with 100 datapoints for a Chisquare distribution
chisquare = np.random.chisquare(2, 100)

In [42]:
#plot Chisquare distribution using a histogram
plt.hist(chisquare)
plt.show()



In [43]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(chisquare)
plt.axvline(chisquare.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(chisquare.mean() + chisquare.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(chisquare.mean()-chisquare.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [44]:
#Evaluate descriptive statistics against data
c1 = np.mean(chisquare)
c2 = np.median(chisquare)
c3= np.var(chisquare)
c4 = np.std(chisquare,ddof=1)
c5 = np.std(chisquare ,ddof=1) / np.sqrt(len(chisquare))

print('chisquare')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['chisquare'] = np.random.choice(chisquare, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)


chisquare
2.1206786374
1.48676337177
4.49504840255
2.13083385834
0.213083385834
chisquare    2.207264
dtype: float64
chisquare    2.237704
dtype: float64

Dirichlet distribution


In [45]:
#Generate a random variable with 100 datapoints for a Chisquare distribution
dirichlet = np.random.dirichlet((1,2), 100)

In [46]:
#plot Dirichlet distribution using a histogram
plt.hist(dirichlet)
plt.show()



In [47]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(dirichlet)
plt.axvline(dirichlet.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(dirichlet.mean() + dirichlet.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(dirichlet.mean()-dirichlet.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [48]:
#Evaluate descriptive statistics against data
d1 = np.mean(dirichlet)
d2 = np.median(dirichlet)
d3= np.var(dirichlet)
d4 = np.std(dirichlet,ddof=1)
d5 = np.std(dirichlet ,ddof=1) / np.sqrt(len(dirichlet))

print('dirichlet')
print(d1)
print(d2)
print(d3)
print(d4)
print(d5)


dirichlet
0.5
0.5
0.0938408165359
0.307103204848
0.0307103204848

Standard t distribution


In [49]:
#Generate a random variable with 100 datapoints for a standard t distribution
standard_t = np.random.standard_t(50, 100)

In [50]:
#plot Standard t distribution using a histogram
plt.hist(standard_t)
plt.show()



In [51]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(standard_t)
plt.axvline(standard_t.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(standard_t.mean() + standard_t.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(standard_t.mean()-standard_t.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [52]:
#Evaluate descriptive statistics against data
c1 = np.mean(standard_t)
c2 = np.median(standard_t)
c3= np.var(standard_t)
c4 = np.std(standard_t,ddof=1)
c5 = np.std(standard_t ,ddof=1) / np.sqrt(len(standard_t))

print('standard_t')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['standard_t'] = np.random.choice(standard_t, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)


standard_t
-0.0152853606037
0.0733338216136
1.087539194
1.04810516571
0.104810516571
standard_t   -0.038865
dtype: float64
standard_t    0.971557
dtype: float64

Logistic distribution


In [53]:
#Generate a random variable with 100 datapoints for logistic distribution
logistic = np.random.logistic(9,2, 100)

In [54]:
#plot logistic distribution using a histogram
plt.hist(logistic)
plt.show()



In [55]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(logistic)
plt.axvline(logistic.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(logistic.mean() + logistic.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(logistic.mean()-logistic.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [56]:
#Evaluate descriptive statistics against data
c1 = np.mean(logistic)
c2 = np.median(logistic)
c3= np.var(logistic)
c4 = np.std(logistic,ddof=1)
c5 = np.std(logistic ,ddof=1) / np.sqrt(len(logistic))

print('standard_t')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['logistic'] = np.random.choice(logistic, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)


standard_t
9.29508652954
9.53696067606
15.2993318944
3.93114113209
0.393114113209
logistic    9.273761
dtype: float64
logistic    3.940146
dtype: float64

Rayleigh distribution


In [57]:
#Generate a random variable with 100 datapoints for rayleigh distribution
rayleigh = np.random.rayleigh(1, 100)

In [58]:
#plot Rayleigh distribution using a histogram
plt.hist(rayleigh)
plt.show()



In [59]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(rayleigh)
plt.axvline(rayleigh.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rayleigh.mean() + rayleigh.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rayleigh.mean()-rayleigh.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [60]:
#Evaluate descriptive statistics against data
c1 = np.mean(rayleigh)
c2 = np.median(rayleigh)
c3= np.var(rayleigh)
c4 = np.std(rayleigh,ddof=1)
c5 = np.std(rayleigh ,ddof=1) / np.sqrt(len(rayleigh))

print('rayleigh')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['rayleigh'] = np.random.choice(rayleigh, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)


rayleigh
1.16842910258
0.959254226449
0.511401535514
0.718726100535
0.0718726100535
rayleigh    1.281252
dtype: float64
rayleigh    0.78202
dtype: float64

Geometric distribution


In [61]:
#Generate a random variable with 100 datapoints for Geometric distribution
geometric = np.random.geometric(0.25, 100)

In [62]:
#plot Geometric distribution using a histogram
plt.hist(geometric)
plt.show()



In [63]:
#Compute mean and standard deviation & plot as vertical lines on the histogram
plt.hist(geometric)
plt.axvline(geometric.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(geometric.mean() + geometric.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(geometric.mean()-geometric.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [64]:
#Evaluate descriptive statistics against data
c1 = np.mean(geometric)
c2 = np.median(geometric)
c3= np.var(geometric)
c4 = np.std(geometric,ddof=1)
c5 = np.std(geometric ,ddof=1) / np.sqrt(len(geometric))

print('geometric')
print(c1)
print(c2)
print(c3)
print(c4)
print(c5)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['geometric'] = np.random.choice(geometric, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
c6 = sample.mean()
c7 = sample.std(ddof=1)
print(c6)
print(c7)


geometric
3.92
3.0
9.6136
3.11620074301
0.311620074301
geometric    3.99
dtype: float64
geometric    2.931869
dtype: float64

In [65]:
#Generation of 2 normally-distributed variables, one with mean 5 and standard deviation of 0.5 and the other with mean of 10 and standar deviation of 1
rand1 = np.random.normal(5, 0.5, 1000)
rand2 = np.random.normal(10,1, 1000)

In [66]:
#Generate a 3rd variable adding the 2 normallt distributed variables
rand3= rand1+rand2

In [67]:
#Plot histogram of the 3rd variable
plt.hist(rand3, bins=20, color='c')
plt.show()



In [68]:
#Compute and plot the mean and standard deviation for variable rand3
plt.hist(rand3, bins=20, color='c')
plt.axvline(rand3.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand3.mean() + rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean()-rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [69]:
#Calculate descriptive statistics for variable rand3
a = np.mean(rand3)
b = np.median(rand3)
c= np.var(rand3)
d = np.std(rand3,ddof=1)
e = np.std(rand3 ,ddof=1) / np.sqrt(len(rand3))
print(a)
print(b)
print(c)
print(d)
print(e)


15.0740718589
15.0825946634
1.20027793015
1.09612016201
0.0346623630118

In [70]:
#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable3'] = np.random.choice(rand3, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
f = sample.mean()
g = sample.std(ddof=1)
print(f)
print(g)


variable3    15.144762
dtype: float64
variable3    1.114334
dtype: float64

In [71]:
#Plot histogram of the 3 variabless
plt.hist(rand1, bins=20, color = 'b')
plt.hist(rand2, bins=20, color='r')
plt.hist(rand3, bins=20, color='c')
plt.show()



In [72]:
#Compute and plot the mean and standard deviation for each of the variables
plt.hist(rand1, bins=20, color = 'b')
plt.hist(rand2, bins=20, color='r')
plt.hist(rand3, bins=20, color='c')

plt.axvline(rand1.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand1.mean() + rand1.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand1.mean()-rand1.std(), color='b', linestyle='dashed', linewidth=2)

plt.axvline(rand2.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand2.mean() + rand2.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand2.mean()-rand2.std(), color='b', linestyle='dashed', linewidth=2)

plt.axvline(rand3.mean(), color='b', linestyle='solid', linewidth=2)
plt.axvline(rand3.mean() + rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.axvline(rand3.mean()-rand3.std(), color='b', linestyle='dashed', linewidth=2)
plt.show()



In [73]:
#Evaluate descriptive statistics against data

a2 = np.mean(rand2)
b2 = np.median(rand2)
c2= np.var(rand2)
d2 = np.std(rand2,ddof=1)
e2 = np.std(rand2 ,ddof=1) / np.sqrt(len(rand2))

print('rand2')
print(a2)
print(b2)
print(c2)
print(d2)
print(e2)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable2'] = np.random.choice(rand2, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
f2 = sample.mean()
g2 = sample.std(ddof=1)
print(f2)
print(g2)


rand2
10.0572601977
10.0314504823
0.973282720879
0.98704456731
0.031213089848
variable2    10.112494
dtype: float64
variable2    0.99336
dtype: float64

In [74]:
#Evaluate descriptive statistics against data
a1 = np.mean(rand1)
b1 = np.median(rand1)
c1= np.var(rand1)
d1 = np.std(rand1,ddof=1)
e1 = np.std(rand1 ,ddof=1) / np.sqrt(len(rand1))

print('rand1')
print(a1)
print(b1)
print(c1)
print(d1)
print(e1)

#Plot a sample for rand3
sample=pd.DataFrame()
sample['variable1'] = np.random.choice(rand1, 100)

# Again, visualize the data.
sample.hist()
plt.show()

# Check how well the sample replicates the population.
f1 = sample.mean()
g1 = sample.std(ddof=1)
print(f1)
print(g1)


rand1
5.01681166115
5.01573361938
0.253057309079
0.503299731471
0.015915734972
variable1    4.888688
dtype: float64
variable1    0.491917
dtype: float64

In [ ]: